*last updated 18 March 2021 by charlotte ambrozek
*imports excel spreadsheets from FNS containing WIC redemptions from TIP data at ZIP/fiscal year level
*ZIP codes with less than 10 vendors are redacted (these are only large WIC ZIP codes)
clear all
set more off

local proj_dir ~
local data_dir ./data/cleaned
local raw_dir ./data/raw
*use corrected version of the public use file that divides ME, MD, and MI by 100 for 2006 sincethose numbers seemed off by orders of magnitude; also NM for 2005 and 2006 and LA 71202 for 2009
forval y = 2005/2008{
	mkf fy`y'wicred
	cwf fy`y'wicred
	import excel "`raw_dir'/Annual WIC Redemptions by Zipcode FY2005-2008_Public UseCAcorrected.xlsx", sheet("`y'") firstrow
	replace VendorState = VendorState[_n-1] if missing(VendorState) & !missing(VendorState[_n-1])
	replace AnnualWICRedemptions = subinstr(AnnualWICRedemptions, ",", "", .)
	destring VendorZip FiscalYear AnnualWICRedemptions, replace
}

forval y = 2009/2017{
	mkf fy`y'wicred
	cwf fy`y'wicred
	import excel "`raw_dir'/Annual WIC Redepmtions by Zip Code FY2009-2017_Public UseCAcorrected.xlsx", sheet("`y'") firstrow
	drop AgencyCode 
	gen FiscalYear = `y'
	gen source = "old"
}
*new data from FNS (2020) overlaps in FY2017 with data received in 2018; compare these and reconcile potential differences
forval y = 2017/2018{
	mkf fy`y'red
	cwf fy`y'red
	import excel "`raw_dir'/Annual WIC Redemptions by Zipcode FY2017-2018_Public Use.xlsx", sheet("`y'") firstrow
	rename (State Zipcode) (VendorState VendorZip)
	gen source = "new"
}
*load old version, compare to new
cwf fy2017wicred
frlink 1:1 VendorZip, frame(fy2017red) generate(check_link)
frget newred = AnnualWICRedemptions, from(check_link)
compare newred AnnualWICRedemptions
*7 obs in old version (sent in 2018 from FNS) that are not in new version (sent 2020 from FNS)
*4 obs larger in new redemptions than in old redemptions, 3 larger in old version than in new versiomn
*figure out what to do about obs that are not the same across versions received from FNS (mean, old value, new value?)
*check other direction 
drop check_link newred 

*load new version, compare to old
cwf fy2017red 

frlink 1:1 VendorZip, frame(fy2017wicred) generate(check_link)
frget oldred = AnnualWICRedemptions, from(check_link)
*1 obs in old version that is not in new version
compare oldred AnnualWICRedemptions
drop check_link oldred 

*append all frames together
cwf fy2005wicred
frame put _all, into(oldwicreds)
cwf oldwicreds
forval i = 2006/2017{
	frameappend fy`i'wicred
}
frame put _all, into(wicreds)
cwf wicreds 
forval i = 2017/2018{
	frameappend fy`i'red
}
*handle duplicated values
drop E 
drop if VendorState == "AS" | VendorState == "PR"
*in general, go with new (2020) data from FNS where there are discrepancies, assuming that it reflects ability to revise redemption values over time with new data from states 
drop if FiscalYear == 2017 & source == "old"  & inlist(VendorZip, 20782, 48126, 48209, 60629, 90011, 17603)
drop source

duplicates drop

duplicates report VendorZip FiscalYear
assert r(N) == r(unique_value)
rename (VendorZip FiscalYear AnnualWICRedemptions VendorState) (zip fiscalyear wic_redemptions state)
gen str_zip = string(zip)
replace str_zip = "00" + str_zip if strlen(str_zip) == 3
replace str_zip = "0" + str_zip if strlen(str_zip) == 4

*square up the frame
frame put zip str_zip, into(zip_fy_sq)
cwf zip_fy_sq
duplicates drop 
expand 14, generate(new)
bys zip (new): gen fiscalyear = sum(new)
replace fiscalyear = fiscalyear + 2005
drop new 

frlink 1:1 zip fiscalyear, frame(wicreds) generate(bal_link)
frget wic_redemptions state, from(bal_link)
bys zip: egen state_mode = mode(state)
replace state = state_mode if missing(state) & !missing(state_mode)
drop state_mode bal_link
compress
save `data_dir'/tip_wic_redemptions_05_18, replace

*make a balanced redemptions version (zips for which we observe non missing redemptions for all years)
gen miss = missing(wic_redemptions)
bys zip: egen flag = max(miss)
drop if flag == 1
drop flag miss 
compress
save `data_dir'/tip_wic_bal_redemptions_05_18, replace
